Gruppe D

Philipp Daiss, Tobias Schmitt, Jens Stöhr und Peter Kowalczyk

Die Grafiken zu den Aufgaben 2-4 werden zusätzlich als Datei mit abgegeben, da Markdown das Bild nicht wie gewünscht darstellt.

Aufgabe 1: Apartments in Wurzburg

require(tidyverse)
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 3.2.5
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.2.5
## Warning: package 'tibble' was built under R version 3.2.5
## Warning: package 'tidyr' was built under R version 3.2.5
## Warning: package 'readr' was built under R version 3.2.5
## Warning: package 'purrr' was built under R version 3.2.5
## Warning: package 'dplyr' was built under R version 3.2.5
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
require(rvest)
## Loading required package: rvest
## Warning: package 'rvest' was built under R version 3.2.5
## Loading required package: xml2
## Warning: package 'xml2' was built under R version 3.2.5
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
require(jsonlite)
## Loading required package: jsonlite
## Warning: package 'jsonlite' was built under R version 3.2.5
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
require(ggmap)
## Loading required package: ggmap
## Warning: package 'ggmap' was built under R version 3.2.5

(a) Getting a list of listings - Ausgangslage

url1 = "https://www.immobilienscout24.de/Suche/S-T/P-"
url2 = "/Wohnung-Miete/Bayern/Wuerzburg?pagerReporting=true"

allResults = NULL

for (i in 1:6)
{
  URL = paste0(url1,i,url2)
  URL %>%
    read_html() %>%
    html_nodes(".result-list-entry__brand-title-container") %>%
    html_attr("href") -> tmp
  allResults = c(allResults,tmp)
}

i steht für die Möglichen Seiten der Suche. url1, url2 und der Zähler i bilden die URL.Daraus entsteht eine abfrage die den HTML Code einliest und nach Elementen mit der Klasse ’result-list-entry__brand-title-container’ suchen. Daraus wird das Attribut href ausgelesen und in die Variable allResult durch hinzufügen eines neues Vektors geschrieben Probleme treten in zwei Richtungen auf. Fehlermeldung, falls es weniger Angebote und dadurch Seiten werden Ebenfalls könnten nicht alle Angebote gefiltert werden, wenn es mehr werden sollten.

getApartment<- function (city){
  allResultsNew = NULL
 
  #Erstellen der StartURL
  firstURL<-'https://www.immobilienscout24.de/Suche/S-T/Wohnung-Miete/Bayern/'
  
  city<- gsub(pattern = '[ä]', replacement = "ae",city)
  city<- gsub(pattern = '[ü]', replacement = "ue",city)
  city<- gsub(pattern = '[ö]', replacement = "oe",city)
  firstURL<-paste0(firstURL,city)
 
  
  
  #Auslesen des Dropdowns
  pageSelektor <- firstURL %>%
    read_html() %>%
    html_nodes("#pageSelection select option") %>%
    html_attr("value") 
  
  
  mainUrl ='https://www.immobilienscout24.de' 
  for(i in 1:length(pageSelektor)){
    URL <- paste0(mainUrl,pageSelektor[i])
    
    URL %>%
      read_html() %>%
      html_nodes(".result-list-entry__brand-title-container") %>%
      html_attr("href") -> tmp
    allResultsNew = c(allResultsNew,tmp)
    
    
    
  }
  return (allResultsNew)
}

ApartmentWue <- getApartment('Wuerzburg')
ApartmentBam <- getApartment('Bamberg')
ApartmentBay <- getApartment('Bayreuth')
ApartmentAug <- getApartment('Augsburg')

ApartmentWue
##   [1] "/expose/94818251" "/expose/95187153" "/expose/87409778"
##   [4] "/expose/95679720" "/expose/87152588" "/expose/95568270"
##   [7] "/expose/95028544" "/expose/94846454" "/expose/95404536"
##  [10] "/expose/95785022" "/expose/95923053" "/expose/95913899"
##  [13] "/expose/95873535" "/expose/95867642" "/expose/95840545"
##  [16] "/expose/95832659" "/expose/95831007" "/expose/95813960"
##  [19] "/expose/95806539" "/expose/95805058" "/expose/95802506"
##  [22] "/expose/95779345" "/expose/95709146" "/expose/95688766"
##  [25] "/expose/95688642" "/expose/95620872" "/expose/95541644"
##  [28] "/expose/95475395" "/expose/95465588" "/expose/95412519"
##  [31] "/expose/95297797" "/expose/95260941" "/expose/95217270"
##  [34] "/expose/95082027" "/expose/95074701" "/expose/95067986"
##  [37] "/expose/94995948" "/expose/94943106" "/expose/94942637"
##  [40] "/expose/94847683" "/expose/94826185" "/expose/94809562"
##  [43] "/expose/94808936" "/expose/94806688" "/expose/94757401"
##  [46] "/expose/94700676" "/expose/94612291" "/expose/94470867"
##  [49] "/expose/94442425" "/expose/94364931" "/expose/94304886"
##  [52] "/expose/93860417" "/expose/93808937" "/expose/93794011"
##  [55] "/expose/93755640" "/expose/93372905" "/expose/93370302"
##  [58] "/expose/93090839" "/expose/93069103" "/expose/93061387"
##  [61] "/expose/93059945" "/expose/92668262" "/expose/92665654"
##  [64] "/expose/91517844" "/expose/91075291" "/expose/90830060"
##  [67] "/expose/90481624" "/expose/90257861" "/expose/90041927"
##  [70] "/expose/89766602" "/expose/89466269" "/expose/88932047"
##  [73] "/expose/88911037" "/expose/88765818" "/expose/88349589"
##  [76] "/expose/86808654" "/expose/86808615" "/expose/85625092"
##  [79] "/expose/85252877" "/expose/84990183" "/expose/84974867"
##  [82] "/expose/83972017" "/expose/83963661" "/expose/83944141"
##  [85] "/expose/82856550" "/expose/82856534" "/expose/82856532"
##  [88] "/expose/82856444" "/expose/82856418" "/expose/82856416"
##  [91] "/expose/82856337" "/expose/82856273" "/expose/82856247"
##  [94] "/expose/82856239" "/expose/81421109" "/expose/75514953"
##  [97] "/expose/72267066" "/expose/48128650" "/expose/95837675"
## [100] "/expose/95832263" "/expose/95829876"
ApartmentBam
##  [1] "/expose/93557573" "/expose/94751148" "/expose/68045960"
##  [4] "/expose/95125095" "/expose/94758512" "/expose/94925272"
##  [7] "/expose/94925414" "/expose/95831108" "/expose/87633975"
## [10] "/expose/95569057" "/expose/95283775" "/expose/79888195"
## [13] "/expose/95987141" "/expose/95950007" "/expose/95912942"
## [16] "/expose/95807676" "/expose/95807575" "/expose/95781655"
## [19] "/expose/95761887" "/expose/95759522" "/expose/95756468"
## [22] "/expose/95736982" "/expose/95736980" "/expose/95736973"
## [25] "/expose/95736383" "/expose/95683996" "/expose/95662182"
## [28] "/expose/95588993" "/expose/95540676" "/expose/95470826"
## [31] "/expose/95449890" "/expose/95406557" "/expose/95308418"
## [34] "/expose/95203430" "/expose/95185103" "/expose/95155373"
## [37] "/expose/95050027" "/expose/94957584" "/expose/94920612"
## [40] "/expose/94865300" "/expose/94600543" "/expose/94536613"
## [43] "/expose/94535025" "/expose/94403688" "/expose/94160526"
## [46] "/expose/93687739" "/expose/93446459" "/expose/92922697"
## [49] "/expose/92911036" "/expose/92282596" "/expose/90573621"
## [52] "/expose/89488397" "/expose/89206439" "/expose/85758955"
## [55] "/expose/85304729" "/expose/84004573" "/expose/80313193"
## [58] "/expose/79872456" "/expose/78761604" "/expose/76625529"
## [61] "/expose/75018974" "/expose/69651900" "/expose/57832571"
## [64] "/expose/89855806" "/expose/84445618" "/expose/84292856"
## [67] "/expose/83149017" "/expose/83004803" "/expose/82860886"
## [70] "/expose/66104508" "/expose/50730259" "/expose/35888561"

(b) Getting listing details

Wäre auch als Schleife Möglich, jedoch Ausnahmen für Bonität und Internet nötig

getApartmentDetails<-function(detailURL){
  
  

  exposeID<- gsub('/expose/','',detailURL)
  detailURL <- paste0('https://www.immobilienscout24.de',detailURL)
  
  
  detailURL<- detailURL %>%
    read_html() 
  
  kaltmiete<-detailURL %>%
    html_nodes(".is24qa-kaltmiete") %>%
    html_text()
  kaltmiete<-gsub(pattern = ' ', replacement = "",kaltmiete[1])
  
  kaltmiete<-substr(kaltmiete, 1, nchar(kaltmiete)-1)
  
  
  if(length(kaltmiete)==0){
    kaltmiete=NA
  }
  
  
  gesamtmiete <-detailURL %>%
    html_nodes(".is24qa-gesamtmiete") %>%
    html_text()
  
  
  
  
  gesamtmiete<-gsub(pattern = ' ', replacement = "",gesamtmiete)
  #Zusatz in Klammern entfernen
  gesamtmiete<-gsub(pattern = '\\([^\\)]*\\)', replacement = "",gesamtmiete)
  
  gesamtmiete<-substr(gesamtmiete, 1, nchar(gesamtmiete)-1)
  if(length(gesamtmiete)==0){
    gesamtmiete=NA
  }
  
  if(kaltmiete==gesamtmiete){
    nebenkosten='0'
   
  }else{
    nebenkosten<-detailURL %>%
      html_nodes(".is24qa-nebenkosten") %>%
      html_text()
    nebenkosten<-gsub(pattern = '[+]', replacement = "",nebenkosten)
    nebenkosten<-gsub(pattern = ' ', replacement = "",nebenkosten)
    nebenkosten<-substr(nebenkosten, 1, nchar(nebenkosten)-1)
    nebenkosten<-gsub(pattern = 'keineAngab', replacement = NA,nebenkosten)
    
    if(length(nebenkosten)==0){
      nebenkosten=NA
    }
  }
  
  
  
  
  
  
  zimmer<-detailURL %>%
    html_nodes(".is24qa-zimmer") %>%
    html_text()
  zimmer<-gsub(pattern = ' ', replacement = "",zimmer)
  
  if(length(zimmer)==0){
    zimmer=NA
  }
  
  
  
  flaeche<-detailURL %>%
    html_nodes(".is24qa-wohnflaeche-ca") %>%
    html_text()
  flaeche<-gsub(pattern = ' ', replacement = "",flaeche)
  
  if(length(flaeche)==0){
    flaeche=NA
  }
  
  
  wohnungstyp<-detailURL %>%
    html_nodes(".is24qa-wohnungstyp") %>%
    html_text()
  wohnungstyp<-gsub(pattern = ' ', replacement = "",wohnungstyp)
  
  if(length(wohnungstyp)==0){
    wohnungstyp=NA
  }
  
  haustiere<-detailURL %>%
    html_nodes(".is24qa-haustiere") %>%
    html_text()
  haustiere<-gsub(pattern = ' ', replacement = "",haustiere)
  
  if(length(haustiere)==0){
    haustiere=NA
  }
  
  parkplatz<-detailURL %>%
    html_nodes(".is24qa-garage-stellplatz") %>%
    html_text()
  parkplatz<-gsub(pattern = ' ', replacement = "",parkplatz)
  
  if(length(parkplatz)==0){
    parkplatz=NA
  }
 
  
  
  anzahlParkplatz<-detailURL %>%
    html_nodes(".is24qa-anzahl-garage-stellplatz") %>%
    html_text()
  anzahlParkplatz<-gsub(pattern = ' ', replacement = "",anzahlParkplatz)
  
  if(length(anzahlParkplatz)==0){
    anzahlParkplatz=NA
  }
  

  
  
  district<-detailURL %>%
    html_nodes(".breadcrumb__link") %>%
    html_text()
  district<-gsub(pattern = ' ', replacement = "",district[4])
  
  
  street<-detailURL %>%
    html_nodes(".address-block") %>%
    html_text()
  street <-street[2]
  
  street <-gsub(pattern = "[ ]{1,}"," ",street)
  street <- gsub('Die vollständige Adresse der Immobilie erhalten Sie vom Anbieter.',"",street)
  
 
  streetName<- trimws(street)
  
  streetName <-gsub(pattern=", [^ ]+$","",streetName)
  
  
  street<-gsub(" ","+",streetName)
  result = tryCatch({
    geocode = fromJSON(paste0("https://maps.googleapis.com/maps/api/geocode/json?address=",street,"+,Germany"))
    
   result<- c(geocode$results$geometry$location$lng[1],geocode$results$geometry$location$lat[1])
    
  }, error = function(e) {
    e
  })
  
  if(is.null(result)){
    longitude<- NA
    latitude<- NA
  }else{
    longitude<- result[1]
    latitude<- result[2]
  }
  
  
  
  return (tibble(exposeID=exposeID,
                 strasse =streetName,
                 stadtteil=district,
                 latitude=latitude,
                 longitude=longitude,
                 kaltmiete=kaltmiete,
                 nebenkosten=nebenkosten,
                 gesamtmiete=gesamtmiete,
                 zimmer=zimmer,
                 flaeche=flaeche,
                 wohnungstyp=wohnungstyp,
                 haustiere=haustiere,
                 parkplatz=parkplatz,
                 anzahlParkplatz=anzahlParkplatz
                 )
          )
 
  
}

(c) Putting it all together

Vorbereitung

WueDf<- map_df(ApartmentWue,getApartmentDetails)
WueDf
## # A tibble: 101 × 14
##    exposeID                                      strasse    stadtteil
##       <chr>                                        <chr>        <chr>
## 1  94818251              Ruppertsgasse 4, 97084 Würzburg Heidingsfeld
## 2  95187153                               97084 Würzburg Heidingsfeld
## 3  87409778 Rottendorfer Straße 51/53/55, 97074 Würzburg    Mönchberg
## 4  95679720                               97072 Würzburg          Dom
## 5  87152588      Rottendorfer Straße 53a, 97074 Würzburg    Mönchberg
## 6  95568270                Wöllergasse 2, 97070 Würzburg   Neumünster
## 7  95028544                               97082 Würzburg  Mainviertel
## 8  94846454            Leistenstrasse 63, 97082 Würzburg Nikolausberg
## 9  95404536                               97070 Würzburg          Dom
## 10 95785022            Sartoriusstraße 3, 97072 Würzburg      Rennweg
## # ... with 91 more rows, and 11 more variables: latitude <dbl>,
## #   longitude <dbl>, kaltmiete <chr>, nebenkosten <chr>,
## #   gesamtmiete <chr>, zimmer <chr>, flaeche <chr>, wohnungstyp <chr>,
## #   haustiere <chr>, parkplatz <chr>, anzahlParkplatz <chr>
WueDf$gesamtmiete<-gsub(pattern = '\\.', replacement = "",WueDf$gesamtmiete)
WueDf$gesamtmieteNeu<- format(WueDf$gesamtmiete, digits=2, decimal.mark=".",
                              small.mark="+",  small.interval=3)
WueDf$gesamtmieteNeu<-gsub(pattern = ' ', replacement = "",WueDf$gesamtmieteNeu)
WueDf$gesamtmieteNeu<-gsub(pattern = ',', replacement = ".",WueDf$gesamtmieteNeu)
WueDf$gesamtmieteNeu <- as.double(WueDf$gesamtmieteNeu)



WueDf$flaecheNeu<-substr(WueDf$flaeche, 1, nchar(WueDf$flaeche)-2)
  
  
  

WueDf$flaecheNeu<-gsub(pattern = ' ', replacement = "",WueDf$flaecheNeu)
WueDf$flaecheNeu<-gsub(pattern = ',', replacement = ".",WueDf$flaecheNeu)
WueDf$flaecheNeu <- as.double(WueDf$flaecheNeu)

WueDf$preisQm <- (WueDf$gesamtmieteNeu/WueDf$flaecheNeu)

Visualisierung als Boxplot

g2 <-ggplot(WueDf, aes(x=stadtteil,y=preisQm))
g2 <- g2 +geom_boxplot()
g2 <-g2 +scale_x_discrete(labels = abbreviate)
g2 
## Warning in f(...): abbreviate mit nicht-ASCII Zeichen genutzt

Visualisierung als Karte. Farbliche Zuordnung der Stadtteile + Preisniveau

WueDf$preisniveau <-' '
WueDf$preisniveau[WueDf$preisQm >10] <-'+'
WueDf$preisniveau[WueDf$preisQm >14] <-'++'
WueDf$preisniveau[WueDf$preisQm >19] <-'+++'
  



basemap <- get_map("Wuerzburg",
                   source = "google",
                   maptype = "hybrid",
                   zoom = 12)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Wuerzburg&zoom=12&size=640x640&scale=2&maptype=hybrid&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Wuerzburg&sensor=false
g = ggplot(WueDf,aes(x=longitude,
                        y=latitude),title( main = "New Orleans area: \n residents 65 and over by ZIP"))


map = ggmap(basemap, base_layer = g,title="test")
map = map + geom_point(size=2,aes(color=stadtteil)) 
map = map+ ggtitle('Lage und Qm Preis ')
map = map + xlab(" >9 = +, 14> = ++ , > 19 = +++")
map = map + ylab("")
map = map + geom_text(data = WueDf,check_overlap = TRUE , aes(label=WueDf$preisniveau), hjust = 0, color = "white")

map
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).

#Aufgabe 2: Comey is going down

require(xlsx)
## Loading required package: xlsx
## Warning: package 'xlsx' was built under R version 3.2.5
## Loading required package: rJava
## Warning: package 'rJava' was built under R version 3.2.5
## Loading required package: xlsxjars
## Warning: package 'xlsxjars' was built under R version 3.2.5
require(ggplot2)
require(tidyverse)
require(RGraphics)
## Loading required package: RGraphics
## Warning: package 'RGraphics' was built under R version 3.2.5
## Loading required package: grid
require(ggrepel)
## Loading required package: ggrepel
## Warning: package 'ggrepel' was built under R version 3.2.5
comey_predictit <- read.xlsx("C:/Users/Tobi/Dropbox/Data Science/DataScience/Problemset2/comey-predictit.xlsx",sheetIndex = 1)


#neues DF mit durchschnittlichen Preisen generieren
AvgPrices <- comey_predictit %>%
  mutate(AvgSharePrice = (comey_predictit$OpenSharePrice + comey_predictit$CloseSharePrice)/2) %>%
  mutate(DateString = factor(comey_predictit$DateString, levels = rev(unique(comey_predictit$DateString)))
)


#plot generieren
p<- ggplot(AvgPrices, aes(DateString, AvgSharePrice, group = 1, col="red")) +
  geom_line(color = "deepskyblue",  size = 1.7)+
  coord_flip () +
  geom_hline(yintercept = 20, color = "steelblue") +
  geom_hline(yintercept = 40, color = "steelblue") +
  geom_hline(yintercept = 60, color = "steelblue") +
  geom_hline(yintercept = 80, color = "steelblue") +
  geom_hline(yintercept = 100, color = "steelblue") +
  labs(title = "PredictIt market on whether Mr Comey would keep his job until June 30th 2017", x = "2017", y = "Likelihood, %", color="red") +
  scale_y_continuous(position="top",breaks=seq(0, 100, by=20),sec.axis = dup_axis(name = waiver())) +
  #ersetzen der Datumsskala durch ein mittig gesetztes Monatslabel
  scale_x_discrete("2017", labels = c("2017-02-13" = "","2017-02-14" = "","2017-02-15" = "","2017-02-16" = "","2017-02-17" = "","2017-02-18" = "",
                                      "2017-02-19" = "","2017-02-20" = "Feb","2017-02-21" = "","2017-02-22" = "","2017-02-23" = "","2017-02-24" = "",
                                      "2017-02-25" = "","2017-02-26" = "","2017-02-27" = "","2017-02-28" = "","2017-03-01" = "","2017-03-02" = "",
                                      "2017-03-03" = "","2017-03-04" = "","2017-03-05" = "","2017-03-06" = "","2017-03-07" = "","2017-03-08" = "",
                                      "2017-03-09" = "","2017-03-10" = "","2017-03-11" = "","2017-03-12" = "","2017-03-13" = "","2017-03-14" = "",
                                      "2017-03-02" = "","2017-03-02" = "","2017-03-02" = "","2017-03-02" = "","2017-03-02" = "","2017-03-02" = "",
                                      "2017-03-15" = "Mar","2017-03-16" = "","2017-03-17" = "","2017-03-18" = "","2017-03-19" = "","2017-03-20" = "",
                                      "2017-03-21" = "","2017-03-22" = "","2017-03-23" = "","2017-03-24" = "","2017-03-25" = "","2017-03-26" = "",
                                      "2017-03-27" = "","2017-03-28" = "","2017-03-29" = "","2017-03-30" = "","2017-03-31" = "","2017-04-01" = "",
                                      "2017-04-02" = "","2017-04-03" = "","2017-04-04" = "","2017-04-05" = "","2017-04-06" = "","2017-04-07" = "",
                                      "2017-04-08" = "","2017-04-09" = "","2017-04-10" = "","2017-04-11" = "","2017-04-12" = "","2017-04-13" = "",
                                      "2017-04-14" = "","2017-04-15" = "Apr","2017-04-16" = "","2017-04-17" = "","2017-04-18" = "","2017-04-19" = "",
                                      "2017-04-20" = "","2017-04-21" = "","2017-04-22" = "","2017-04-23" = "","2017-04-24" = "","2017-04-25" = "",
                                      "2017-04-26" = "","2017-04-27" = "","2017-04-28" = "","2017-04-29" = "","2017-04-30" = "","2017-05-01" = "",
                                      "2017-05-02" = "","2017-05-03" = "","2017-05-04" = "","2017-05-05" = "","2017-05-06" = "","2017-05-07" = "May",
                                      "2017-05-08" = "","2017-05-09" = "","2017-05-10" = "","2017-05-11" = "","2017-05-12" = "","2017-05-13" = "")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_rect(fill ="white"))


#Annotations 
subset1 <- subset(AvgPrices, DateString == "2017-03-03")
p <- p + geom_point(data=subset1,color="red")+
  geom_text_repel(data=subset1, label="Democrats in Congress criticise Mr Comey 
          for his differing approches to handling 
          the cases involving Mr Trump and Mrs Clinton",fontface = 'bold',color='black',nudge_x=18,nudge_y=-50,point.padding = unit(1, "lines"), segment.color = 'red')

subset2 <- subset(AvgPrices, DateString == "2017-03-06")
p <- p + geom_point(data=subset2,color="red")+
  geom_text_repel(data=subset2, label="Mr Trump accuses Barack Obama's administration 
        of wiretapping his phones during the presidential campaign",fontface = 'bold',color='black',nudge_x=5,nudge_y=-40,point.padding = unit(1, "lines"), segment.color = 'red')

subset3 <- subset(AvgPrices, DateString == "2017-03-19")
p <- p + geom_point(data=subset3,color="red")+
  geom_text_repel(data=subset3, label="Mr Comey testifies to Congress. He dismisses 
        Mr Trump's wiretap claims, and confirms the FBI is investigating 
        links between Russia and Mr Trump's campaign",fontface = 'bold',color='black',nudge_x=0,nudge_y=-50,point.padding = unit(1, "lines"), segment.color = 'red')

subset4 <- subset(AvgPrices, DateString == "2017-04-13")
p <- p + geom_point(data=subset4,color="red")+
          geom_text_repel(data=subset4, label="The media reports that the FBI obtained a 
            warrant to investigate links between Carter Page, an informal 
            adviser to Mr Trump, and Russian officials",fontface = 'bold',color='black',nudge_x=10,nudge_y=-55,point.padding = unit(1, "lines"), segment.color = 'red')

subset5 <- subset(AvgPrices, DateString == "2017-05-04")
p <- p + geom_point(data=subset5,color="red")+
  geom_text_repel(data=subset5, label="Mr Comey gives testimony saying he felt 'middly 
nauseous' at the thoght that the FBI probe of Mrs Clinton may have affected 
the election. He also says the FBI found 'hundreds of thousands' of e-mails 
relating to Mrs Clinton on Mr Weiner's laptop",fontface = 'bold',color='black',nudge_x=20,nudge_y=-55,point.padding = unit(1, "lines"), segment.color = 'red')

subset6 <- subset(AvgPrices, DateString == "2017-05-13")
p <- p + geom_point(data=subset6,color="red")+
  geom_text_repel(data=subset6, label="The FBI issues a statement correcting Mr Comey's 
            testimony on May 3rd, saying the e-mails found on Mr Weiner's laptop mostly resulted 
            from a backup process. Mr Trump sacks Mr Comey, citing the advice of the attorney-general, 
            over his handling of the Clinton investigation",fontface = 'bold', color='black',nudge_x=10 ,nudge_y=5, point.padding = unit(1, "lines"), segment.color = 'red')

#assign color to axis
p <-p + theme(
  plot.title = element_text(color="deepskyblue", size=14, face="bold"),
  axis.title.x = element_text(color="black", size=11, hjust=1, vjust =1),
  axis.title.y = element_text(color="red", size=14, face="bold", angle = 360)
  )

p

Aufgabe 3: Zeit Germany Map

require(ggrepel)
require(ggmap)
require(tidyverse)
require(WikidataR)
## Loading required package: WikidataR
require(WikidataQueryServiceR)
## Loading required package: WikidataQueryServiceR
#DataFrame aus Wikidata erstellen
World_Heritages = query_wikidata('#List of World Heritages in Germany
                                 #defaultView:Map
                                 SELECT DISTINCT ?heritage ?heritageLabel ?lat ?lon
                                 WHERE 
                                 {
                                 ?heritage wdt:P1435 wd:Q9259 ;
                                 wdt:P17 wd:Q183 ;
                                 wdt:P625 ?coord .
                                 ?heritage p:P625 ?coordinate .
                                 ?coordinate psv:P625 ?coordinate_node .
                                 ?coordinate_node wikibase:geoLatitude ?lat .
                                 ?coordinate_node wikibase:geoLongitude ?lon .
                                 SERVICE wikibase:label { bd:serviceParam wikibase:language "en, de". }
                                 }
                                 ORDER BY ?date')
## 86 rows were returned by WDQS
# Aussortieren der zweie Duplikate, welche im Datensatz vorhanden sind (aufgrund, unterschiedlicher longitudes) 
World_Heritages <- World_Heritages[!duplicated(World_Heritages$heritage),]
World_Heritages <- World_Heritages[!duplicated(World_Heritages$heritageLabel),]

# Aussortieren der "Prähistorischen Pfahlbauten". Wikidata wählt hier wohl den ersten Ort, welcher alphabetisch in der Tabelle mit allen Pfahlbauten hinterlegt ist.  
# [OPTIONAL] Falls keine Einschränkung hierfür erwünscht ist, kein dieser Befehl ignoriert werden.  
World_Heritages <- subset(World_Heritages,!World_Heritages$heritageLabel=="prehistoric pile dwellings around the Alps")


# [Empfohlen] Aussortieren zweier UNESCO Weltkulturerbe, welche ebenfalls in Wikidata Deutschland, neben anderen Ländern zugeordnet werden, aber laut Koordinaten nicht in Deutschland liegen.
# Nimmt man diese hinzu wird eine Fehlermeldung ?ber diese zwei Orte ausgegeben, welche nicht auf der Karte, aufgrund des zu nahen Zooms auf Deutschland, dargestellt werden können.
# Falls keine Aussortierung hierfür erwünscht ist, kein dieser Befehl ignoriert werden.  
# Die zwei Entities: "Borders of the Roman Empire" & "Primeval Beech Forests of the Carpathians and the Ancient Beech Forests of Germany"
World_Heritages <- subset(World_Heritages,!World_Heritages$heritageLabel=="Borders of the Roman Empire")
World_Heritages <- subset(World_Heritages,!World_Heritages$heritageLabel=="Primeval Beech Forests of the Carpathians and the Ancient Beech Forests of Germany")



#Eerste Mapping-Möglichkeit: 
  #mit get_map-Funktion, wie im Aufgabenblatt vorgeschlagen. 
  #Hier werden jedoch Länder und Städtenamen mitdargestellt, was wiederum die Übersichtlichkeit der Grafik meiner Ansicht nach schmälert. Hier ist weniger mehr. Ich bevorzuge die zweite Mapping-Möglichkeit.
  
  #Der nachfolgende Code erstellt die Karte
  map_ggmap <- get_map(location = 'germany', zoom = 6,
                 scale = "auto", maptype = c("terrain"), source = c("google"), force = ifelse(source == "google", TRUE, TRUE),
                 messaging = FALSE, urlonly = FALSE, filename = "ggmapTemp",
                 crop = TRUE, color = c("color"), language = "de-DE", api_key)      
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=germany&zoom=6&size=640x640&scale=2&maptype=terrain&language=de-DE&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=germany&sensor=false
  #Hier wird die Karte mittels Daten anhand der Koordinaten nach longitude und latitude geplottet.
  #Zwei Objekte werden entfernt, da sie außerhalb der Karte liegen und trotzdem im Wikidata Deutschland zugeordnet sind. (auch bei p2)
    p <- ggmap(map_ggmap) + geom_point(
    aes(x=World_Heritages$lon, y=World_Heritages$lat), col="red", alpha=0.4, size = 2, 
    data=World_Heritages)
  
  #Karte mit Punkten aller UNESCO Weltkulturerbe in Deutschland. 
  p

  # 3 Beschriftungsm?glichkeiten:
  
    # FAVORIT unter den Varianten!
    # geom_text_repel versucht ein Überlappen der Labels zu verhindert. 
    # Dies ist jedoch erst in einer exportierten Grafik des Plots bei ca. 1500x1500 pixels richtig erkennbar. - nicht aber in der Plotvorschau in R.
    # siehe hierzu Grafik_1 aus Anhang_Aufgabe_3"
    p + geom_text_repel(data = World_Heritages, aes(label=World_Heritages$heritageLabel, fontface = "bold"), box.padding = unit(0.3, "lines")) + 
          annotate("text", x = 10, y = 55, label = "Weltkulturerbestätten und Weltnaturerbestätten in Deutschland",
          col="black", cex=6,
          fontface = "bold", alpha = 0.8)

    # hier werden nur besimmte Labels geplottet. Daher ist diese Funktion eher ungeeignet. 
    p + geom_text(data = World_Heritages, aes(label=World_Heritages$heritageLabel, fontface = "bold"), check_overlap=TRUE) + 
          annotate("text", x = 10, y = 55, label = "Weltkulturerbestätten und Weltnaturerbest?tten in Deutschland",
          col="black", cex=6,
          fontface = "bold", alpha = 0.8)

    # Die Labels sehen schön aus. Ein Überlappen lässt sich leider jedoch nicht das ich wüsste vermeiden.
    p + geom_label(data = World_Heritages, aes(label = World_Heritages$heritageLabel, fontface = "bold")) + 
          annotate("text", x = 10, y = 55, label = "Weltkulturerbest?tten und Weltnaturerbest?tten in Deutschland",
          col="black", cex=6,
          fontface = "bold", alpha = 0.8)

# zweite Mapping-Möglichkeit ohne Städte- und Ländernamen (anschaulicher):
  
  #Der nachfolgende Code erstellt die Karte mittels get_googlemap aus ggmap
  map_googlemap <- get_googlemap(center = c(10.25828, 51.11484), zoom = 6, maptype = "terrain",
                       style = 'feature:road|element:all|visibility:simplified&style=feature:administrative.locality|element:labels|visibility:off&style=feature:administrative.country|element:labels|visibility:off')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=51.11484,10.25828&zoom=6&size=640x640&scale=2&maptype=terrain&style=feature:road%7Celement:all%7Cvisibility:simplified&style=feature:administrative.locality%7Celement:labels%7Cvisibility:off&style=feature:administrative.country%7Celement:labels%7Cvisibility:off&sensor=false
  #Hier wird die Karte mittels Daten anhand der Koordinaten nach longitude und latitude geplottet.
  p2 <- ggmap(map_googlemap) + geom_point(
    aes(x=World_Heritages$lon, y=World_Heritages$lat), col="red", alpha=0.4, size = 2, 
    data=World_Heritages)
  
  #Karte mit Punkten aller UNESCO Weltkulturerbe in Deutschland. 
  p2

  # 3 Beschriftungsmöglichkeiten (mit Annotation als Überschrift): 
    
    # FAVORIT unter den Varianten!
    # siehe hierzu Grafik_2 aus "Anhang_Aufgabe_3"
    p2 + geom_text_repel(data = World_Heritages, aes(label = World_Heritages$heritageLabel, fontface = "bold")) + 
          annotate("text", x = 10, y = 55, label = "Weltkulturerbestätten und Weltnaturerbestätten in Deutschland",
          col="black", cex=6,
          fontface = "bold", alpha = 0.8)

    # Wie oben, nur mit neuer Map: Die Labels sehen schön aus. Ein Überlappen lässt sich leider jedoch nicht das ich wüsste vermeiden.
    p2 + geom_label(data = World_Heritages, aes(label = World_Heritages$heritageLabel, fontface = "bold")) + 
           annotate("text", x = 10, y = 55, label = "Weltkulturerbestätten und Weltnaturerbestätten in Deutschland",
           col="black", cex=6,
           fontface = "bold", alpha = 0.8)

    # Wie oben, nur mit neuer Map: hier werden nur besimmte Labels geplottet. Daher ist diese Funktion eher ungeeignet.
    p2 + geom_text(data = World_Heritages, aes(label = World_Heritages$heritageLabel, fontface = "bold"), check_overlap=TRUE) + 
            annotate("text", x = 10, y = 55, label = "Weltkulturerbestätten und Weltnaturerbestätten in Deutschland",
            col="black", cex=6,
            fontface = "bold", alpha = 0.8)

#Zusätzliches zur Wikidata Query:

#Falls man die Jahre & Bilder der heritages mit hinzu nehmen möchte verliert man einen Großteil der Ergebnisse, da diese nicht überall eingepflegt sind.
  #Das wäre dann die zugehörige Query für Wikidata:
  World_Heritages = query_wikidata('#List of World Heritages in Germany
                                   #defaultView:Map
                                    SELECT DISTINCT ?heritage ?heritageLabel (YEAR(?date) as ?year) ?image ?lat ?lon
                                    WHERE 
                                    {
                                    ?heritage wdt:P1435 wd:Q9259 ;
                                    wdt:P17 wd:Q183 ;
                                    wdt:P18 ?image ;
                                    wdt:P571 ?date ;
                                    wdt:P625 ?coord .
                                    ?heritage p:P625 ?coordinate .
                                    ?coordinate psv:P625 ?coordinate_node .
                                    ?coordinate_node wikibase:geoLatitude ?lat .
                                    ?coordinate_node wikibase:geoLongitude ?lon .
                                    SERVICE wikibase:label { bd:serviceParam wikibase:language "en,de". }
                                    }
                                    ORDER BY ?date')
## 9 rows were returned by WDQS
#Falls man die Bezeichnungen für die Weltkulturerbe nur auf Deutsch bzw. Englisch möchte, verliert man teils Bezeichnungen von Entities und ersetzt diese durch kryptische Q-entities aus Wikidata.
  #Daher wird neben "de" auch "en" in die Query aufgenommen.
require(tidyverse)
require(reshape2)
## Loading required package: reshape2
## Warning: package 'reshape2' was built under R version 3.2.5
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
require(ggplot2)
require(directlabels)
## Loading required package: directlabels
## Warning: package 'directlabels' was built under R version 3.2.5
bundesligatable <-  read.csv2("http://www.football-data.co.uk/mmz4281/1617/D1.csv" , sep=",", header=TRUE, stringsAsFactors = FALSE)  

bundesligatable2 <- select(bundesligatable,c(2:7))
newBundesligatable<-reshape(bundesligatable2,dir ="long",varying = list(c(2,3)))

newBundesligatable$punkte=0
newBundesligatable[newBundesligatable$FTR=='H' & newBundesligatable$time==1,]$punkte=3
newBundesligatable[newBundesligatable$FTR=='A' & newBundesligatable$time==2,]$punkte=3
newBundesligatable[newBundesligatable$FTR=='D',]$punkte=1

clublist <- unique(newBundesligatable$HomeTeam)

getClubData <- function(name){
    clubData <- newBundesligatable %>%
    select(HomeTeam,punkte,Date) %>%
    filter(HomeTeam==name) %>%
    mutate(punkteSpieltag = cumsum(punkte))%>%
    mutate(spieltag =row_number(HomeTeam))
    
    
    return (clubData)
}



createFinalTable <- function(df,clubName){
  
    ClubData<-getClubData(clubName)
    
    df<-mutate(df, clubName = ClubData$punkteSpieltag)
    colnames(df)[length(df)]<-clubName
    return (df)
}


firstClub = getClubData(clublist[1])
df<-firstClub%>%
select(spieltag) 
  

for(i in clublist ){
  df<-createFinalTable(df,i)
}


meltDF<-melt(df, id ="spieltag")

meltDF$teamcolor<-""

meltDF[meltDF$variable=='Bayern Munich',]$teamcolor<-'red'
meltDF[meltDF$variable=='Augsburg',]$teamcolor<-'red1'
meltDF[meltDF$variable=='Dortmund',]$teamcolor<-'yellow'
meltDF[meltDF$variable=='Ein Frankfurt',]$teamcolor<-'red2'
meltDF[meltDF$variable=='FC Koln',]$teamcolor<-'firebrick'
meltDF[meltDF$variable=="M'gladbach",]$teamcolor<-'green2'
meltDF[meltDF$variable=='Hertha',]$teamcolor<-'blue'
meltDF[meltDF$variable=='Hoffenheim',]$teamcolor<-'blue1'
meltDF[meltDF$variable=='Schalke 04',]$teamcolor<-'blue2'
meltDF[meltDF$variable=='Darmstadt',]$teamcolor<-'blue3'
meltDF[meltDF$variable=='Freiburg',]$teamcolor<-'black'
meltDF[meltDF$variable=='Ingolstadt',]$teamcolor<-'red4'
meltDF[meltDF$variable=='Leverkusen',]$teamcolor<-'green3'
meltDF[meltDF$variable=='RB Leipzig',]$teamcolor<-'blue4'
meltDF[meltDF$variable=='Wolfsburg',]$teamcolor<-'green1'
meltDF[meltDF$variable=='Mainz',]$teamcolor<-'red1'
meltDF[meltDF$variable=='Werder Bremen',]$teamcolor<-'green4'
meltDF[meltDF$variable=='Hamburg',]$teamcolor<-'firebrick2'

colmapping=unique(meltDF[c("variable","teamcolor")])



g<-ggplot(data=meltDF,
       aes(x=spieltag, y=value, color=variable, group=variable, frame=spieltag)) +
       
       geom_line()+
       scale_colour_manual("variable",values = c(colmapping$teamcolor))+
       scale_x_continuous(breaks = seq(1, 34, 1),limits = c(0,36),sec.axis = dup_axis(name = waiver()))+
       scale_y_continuous(breaks = seq(min(df[34,]), max(df[34,]), 1),position = "right")+ 
       expand_limits(x = 1, y = 0)+
       geom_dl(aes(label = variable), method = list(dl.trans(x = x + 0.2), "last.points", cex = 0.6))+ ylab('Punkte nach dem letzten Spieltag')

g<-g+coord_fixed(ratio=0.3)
g

g2<-ggplot(data=meltDF,
          aes(x=spieltag, y=value, color=variable, group=variable, frame=spieltag)) +
  
  geom_point()+
  scale_colour_manual("variable",values = c(colmapping$teamcolor))+
  scale_x_continuous(breaks = seq(1, 34, 1),limits = c(0,36),sec.axis = dup_axis(name = waiver()))+
  scale_y_continuous(breaks = seq(min(df[34,]), max(df[34,]), 1),position = "right")+ 
  expand_limits(x = 1, y = 0)
  
g2<-g2+coord_fixed(ratio=0.3)

g2+facet_wrap(~spieltag)

gganimate, bekamen wir nicht zum laufen.